# vim:fileencoding=utf-8
# License: BSD
# Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net>
# Copyright: 2013, Alexander Tsepkov

# globals: ρσ_iterator_symbol, ρσ_list_decorate

# basic implementation of Python's 're' library

from __python__ import bound_methods

# Alias DB from http://www.unicode.org/Public/8.0.0/ucd/NameAliases.txt {{{
_ALIAS_MAP = {"null":0,"nul":0,"start of heading":1,"soh":1,"start of text":2,"stx":2,"end of text":3,"etx":3,"end of transmission":4,"eot":4,"enquiry":5,"enq":5,"acknowledge":6,"ack":6,"alert":7,"bel":7,"backspace":8,"bs":8,"character tabulation":9,"horizontal tabulation":9,"ht":9,"tab":9,"line feed":10,"new line":10,"end of line":10,"lf":10,"nl":10,"eol":10,"line tabulation":11,"vertical tabulation":11,"vt":11,"form feed":12,"ff":12,"carriage return":13,"cr":13,"shift out":14,"locking-shift one":14,"so":14,"shift in":15,"locking-shift zero":15,"si":15,"data link escape":16,"dle":16,"device control one":17,"dc1":17,"device control two":18,"dc2":18,"device control three":19,"dc3":19,"device control four":20,"dc4":20,"negative acknowledge":21,"nak":21,"synchronous idle":22,"syn":22,"end of transmission block":23,"etb":23,"cancel":24,"can":24,"end of medium":25,"eom":25,"substitute":26,"sub":26,"escape":27,"esc":27,"information separator four":28,"file separator":28,"fs":28,"information separator three":29,"group separator":29,"gs":29,"information separator two":30,"record separator":30,"rs":30,"information separator one":31,"unit separator":31,"us":31,"sp":32,"delete":127,"del":127,"padding character":128,"pad":128,"high octet preset":129,"hop":129,"break permitted here":130,"bph":130,"no break here":131,"nbh":131,"index":132,"ind":132,"next line":133,"nel":133,"start of selected area":134,"ssa":134,"end of selected area":135,"esa":135,"character tabulation set":136,"horizontal tabulation set":136,"hts":136,"character tabulation with justification":137,"horizontal tabulation with justification":137,"htj":137,"line tabulation set":138,"vertical tabulation set":138,"vts":138,"partial line forward":139,"partial line down":139,"pld":139,"partial line backward":140,"partial line up":140,"plu":140,"reverse line feed":141,"reverse index":141,"ri":141,"single shift two":142,"single-shift-2":142,"ss2":142,"single shift three":143,"single-shift-3":143,"ss3":143,"device control string":144,"dcs":144,"private use one":145,"private use-1":145,"pu1":145,"private use two":146,"private use-2":146,"pu2":146,"set transmit state":147,"sts":147,"cancel character":148,"cch":148,"message waiting":149,"mw":149,"start of guarded area":150,"start of protected area":150,"spa":150,"end of guarded area":151,"end of protected area":151,"epa":151,"start of string":152,"sos":152,"single graphic character introducer":153,"sgc":153,"single character introducer":154,"sci":154,"control sequence introducer":155,"csi":155,"string terminator":156,"st":156,"operating system command":157,"osc":157,"privacy message":158,"pm":158,"application program command":159,"apc":159,"nbsp":160,"shy":173,"latin capital letter gha":418,"latin small letter gha":419,"cgj":847,"alm":1564,"syriac sublinear colon skewed left":1801,"kannada letter llla":3294,"lao letter fo fon":3741,"lao letter fo fay":3743,"lao letter ro":3747,"lao letter lo":3749,"tibetan mark bka- shog gi mgo rgyan":4048,"fvs1":6155,"fvs2":6156,"fvs3":6157,"mvs":6158,"zwsp":8203,"zwnj":8204,"zwj":8205,"lrm":8206,"rlm":8207,"lre":8234,"rle":8235,"pdf":8236,"lro":8237,"rlo":8238,"nnbsp":8239,"mmsp":8287,"wj":8288,"lri":8294,"rli":8295,"fsi":8296,"pdi":8297,"weierstrass elliptic function":8472,"micr on us symbol":9288,"micr dash symbol":9289,"leftwards triangle-headed arrow with double vertical stroke":11130,"rightwards triangle-headed arrow with double vertical stroke":11132,"yi syllable iteration mark":40981,"presentation form for vertical right white lenticular bracket":65048,"vs1":65024,"vs2":65025,"vs3":65026,"vs4":65027,"vs5":65028,"vs6":65029,"vs7":65030,"vs8":65031,"vs9":65032,"vs10":65033,"vs11":65034,"vs12":65035,"vs13":65036,"vs14":65037,"vs15":65038,"vs16":65039,"byte order mark":65279,"bom":65279,"zwnbsp":65279,"cuneiform sign nu11 tenu":74452,"cuneiform sign nu11 over nu11 bur over bur":74453,"byzantine musical symbol fthora skliron chroma vasis":118981,"vs17":917760,"vs18":917761,"vs19":917762,"vs20":917763,"vs21":917764,"vs22":917765,"vs23":917766,"vs24":917767,"vs25":917768,"vs26":917769,"vs27":917770,"vs28":917771,"vs29":917772,"vs30":917773,"vs31":917774,"vs32":917775,"vs33":917776,"vs34":917777,"vs35":917778,"vs36":917779,"vs37":917780,"vs38":917781,"vs39":917782,"vs40":917783,"vs41":917784,"vs42":917785,"vs43":917786,"vs44":917787,"vs45":917788,"vs46":917789,"vs47":917790,"vs48":917791,"vs49":917792,"vs50":917793,"vs51":917794,"vs52":917795,"vs53":917796,"vs54":917797,"vs55":917798,"vs56":917799,"vs57":917800,"vs58":917801,"vs59":917802,"vs60":917803,"vs61":917804,"vs62":917805,"vs63":917806,"vs64":917807,"vs65":917808,"vs66":917809,"vs67":917810,"vs68":917811,"vs69":917812,"vs70":917813,"vs71":917814,"vs72":917815,"vs73":917816,"vs74":917817,"vs75":917818,"vs76":917819,"vs77":917820,"vs78":917821,"vs79":917822,"vs80":917823,"vs81":917824,"vs82":917825,"vs83":917826,"vs84":917827,"vs85":917828,"vs86":917829,"vs87":917830,"vs88":917831,"vs89":917832,"vs90":917833,"vs91":917834,"vs92":917835,"vs93":917836,"vs94":917837,"vs95":917838,"vs96":917839,"vs97":917840,"vs98":917841,"vs99":917842,"vs100":917843,"vs101":917844,"vs102":917845,"vs103":917846,"vs104":917847,"vs105":917848,"vs106":917849,"vs107":917850,"vs108":917851,"vs109":917852,"vs110":917853,"vs111":917854,"vs112":917855,"vs113":917856,"vs114":917857,"vs115":917858,"vs116":917859,"vs117":917860,"vs118":917861,"vs119":917862,"vs120":917863,"vs121":917864,"vs122":917865,"vs123":917866,"vs124":917867,"vs125":917868,"vs126":917869,"vs127":917870,"vs128":917871,"vs129":917872,"vs130":917873,"vs131":917874,"vs132":917875,"vs133":917876,"vs134":917877,"vs135":917878,"vs136":917879,"vs137":917880,"vs138":917881,"vs139":917882,"vs140":917883,"vs141":917884,"vs142":917885,"vs143":917886,"vs144":917887,"vs145":917888,"vs146":917889,"vs147":917890,"vs148":917891,"vs149":917892,"vs150":917893,"vs151":917894,"vs152":917895,"vs153":917896,"vs154":917897,"vs155":917898,"vs156":917899,"vs157":917900,"vs158":917901,"vs159":917902,"vs160":917903,"vs161":917904,"vs162":917905,"vs163":917906,"vs164":917907,"vs165":917908,"vs166":917909,"vs167":917910,"vs168":917911,"vs169":917912,"vs170":917913,"vs171":917914,"vs172":917915,"vs173":917916,"vs174":917917,"vs175":917918,"vs176":917919,"vs177":917920,"vs178":917921,"vs179":917922,"vs180":917923,"vs181":917924,"vs182":917925,"vs183":917926,"vs184":917927,"vs185":917928,"vs186":917929,"vs187":917930,"vs188":917931,"vs189":917932,"vs190":917933,"vs191":917934,"vs192":917935,"vs193":917936,"vs194":917937,"vs195":917938,"vs196":917939,"vs197":917940,"vs198":917941,"vs199":917942,"vs200":917943,"vs201":917944,"vs202":917945,"vs203":917946,"vs204":917947,"vs205":917948,"vs206":917949,"vs207":917950,"vs208":917951,"vs209":917952,"vs210":917953,"vs211":917954,"vs212":917955,"vs213":917956,"vs214":917957,"vs215":917958,"vs216":917959,"vs217":917960,"vs218":917961,"vs219":917962,"vs220":917963,"vs221":917964,"vs222":917965,"vs223":917966,"vs224":917967,"vs225":917968,"vs226":917969,"vs227":917970,"vs228":917971,"vs229":917972,"vs230":917973,"vs231":917974,"vs232":917975,"vs233":917976,"vs234":917977,"vs235":917978,"vs236":917979,"vs237":917980,"vs238":917981,"vs239":917982,"vs240":917983,"vs241":917984,"vs242":917985,"vs243":917986,"vs244":917987,"vs245":917988,"vs246":917989,"vs247":917990,"vs248":917991,"vs249":917992,"vs250":917993,"vs251":917994,"vs252":917995,"vs253":917996,"vs254":917997,"vs255":917998,"vs256":917999}
# }}}

_ASCII_CONTROL_CHARS = {'a':7, 'b':8, 'f': 12, 'n': 10, 'r': 13, 't': 9, 'v': 11}
_HEX_PAT = /^[a-fA-F0-9]/
_NUM_PAT =  /^[0-9]/
_GROUP_PAT = /<([^>]+)>/
_NAME_PAT = /^[a-zA-Z ]/

I = IGNORECASE = 2
L = LOCALE = 4
M = MULTILINE = 8
D = DOTALL = 16
U = UNICODE = 32
X = VERBOSE = 64
DEBUG = 128
A = ASCII = 256

supports_unicode = RegExp.prototype.unicode is not undefined

_RE_ESCAPE = /[-\/\\^$*+?.()|[\]{}]/g

_re_cache_map = {}
_re_cache_items = v'[]'

error = SyntaxError  # This is the error JS throws for invalid regexps
has_prop = Object.prototype.hasOwnProperty.call.bind(Object.prototype.hasOwnProperty)

def _expand(groups, repl, group_name_map):
    i = 0

    def next():
        nonlocal i
        return v'repl[i++]'

    def peek():
        return repl[i]

    def read_digits(count, pat, base, maxval, prefix):
        ans = prefix or ''
        greedy = count is Number.MAX_VALUE
        while count > 0:
            count -= 1
            if not pat.test(peek()):
                if greedy:
                    break
                return ans
            ans += next()
        nval = parseInt(ans, base)
        if nval > maxval:
            return ans
        return nval

    def read_escape_sequence():
        nonlocal i
        q = next()
        if not q or q is '\\':
            return '\\'
        if '"\''.indexOf(q) is not -1:
            return q
        if _ASCII_CONTROL_CHARS[q]:
            return String.fromCharCode(_ASCII_CONTROL_CHARS[q])
        if '0' <= q <= '9':
            ans = read_digits(Number.MAX_VALUE, _NUM_PAT, 10, Number.MAX_VALUE, q)
            if jstype(ans) is 'number':
                return groups[ans] or ''
            return '\\' + ans
        if q is 'g':
            m = _GROUP_PAT.exec(repl[i:])
            if m is not None:
                i += m[0].length
                gn = m[1]
                if isNaN(parseInt(gn, 10)):
                    if not has_prop(group_name_map, gn):
                        return ''
                    gn = group_name_map[gn][-1]
                return groups[gn] or ''
        if q is 'x':
            code = read_digits(2, _HEX_PAT, 16, 0x10FFFF)
            if jstype(code) is 'number':
                return String.fromCharCode(code)
            return '\\x' + code
        if q is 'u':
            code = read_digits(4, _HEX_PAT, 16, 0x10FFFF)
            if jstype(code) is 'number':
                return String.fromCharCode(code)
            return '\\u' + code
        if q is 'U':
            code = read_digits(8, _HEX_PAT, 16, 0x10FFFF)
            if jstype(code) is 'number':
                if code <= 0xFFFF:
                    return String.fromCharCode(code)
                code -= 0x10000
                return String.fromCharCode(0xD800+(code>>10), 0xDC00+(code&0x3FF))
            return '\\U' + code
        if q is 'N' and peek() is '{':
            next()
            name = ''
            while _NAME_PAT.test(peek()):
                name += next()
            if peek() is not '}':
                return '\\N{' + name
            next()
            key = (name or '').toLowerCase()
            if not name or not has_prop(_ALIAS_MAP, key):
                return '\\N{' + name + '}'
            code = _ALIAS_MAP[key]
            if code <= 0xFFFF:
                return String.fromCharCode(code)
            code -= 0x10000
            return String.fromCharCode(0xD800+(code>>10), 0xDC00+(code&0x3FF))

        return '\\' + q

    ans = ch = ''
    while True:
        ch = next()
        if ch is '\\':
            ans += read_escape_sequence()
        elif not ch:
            break
        else:
            ans += ch
    return ans

def transform_regex(source, flags):
    pos = 0
    previous_backslash = in_class = False
    ans = ''
    group_map = {}
    flags = flags or 0
    group_count = 0

    while pos < source.length:
        ch = v'source[pos++]'
        if previous_backslash:
            ans += '\\' + ch
            previous_backslash = False
            continue

        if in_class:
            if ch is ']':
                in_class = False
            ans += ch
            continue

        if ch is '\\':
            previous_backslash = True
            continue

        if ch is '[':
            in_class = True
            if source[pos] is ']':  # in python the empty set is not allowed, instead []] is the same as [\]]
                pos += 1
                ch = r'[\]'
        elif ch is '(':
            if source[pos] is '?':
                extension = source[pos + 1]
                if extension is '#':
                    close = source.indexOf(')', pos + 1)
                    if close is -1:
                        raise ValueError('Expecting a closing )')
                    pos = close + 1
                    continue
                if 'aiLmsux'.indexOf(extension) is not -1:
                    flag_map = {'a':ASCII, 'i':IGNORECASE, 'L':LOCALE, 'm':MULTILINE, 's':DOTALL, 'u':UNICODE, 'x':VERBOSE}
                    close = source.indexOf(')', pos + 1)
                    if close is -1:
                        raise SyntaxError('Expecting a closing )')
                    flgs = source[pos+1:close]
                    for v'var i = 0; i < flgs.length; i++':
                        q = flgs[i]  # noqa:undef
                        if not has_prop(flag_map, q):
                            raise SyntaxError('Invalid flag: ' + q)
                        flags |= flag_map[q]
                    pos = close + 1
                    continue
                if extension is '(':
                    raise SyntaxError('Group existence assertions are not supported in JavaScript')
                if extension is 'P':
                    pos += 2
                    q = source[pos]
                    if q is '<':
                        close = source.indexOf('>', pos)
                        if close is -1:
                            raise SyntaxError('Named group not closed, expecting >')
                        name = source[pos+1:close]
                        if not has_prop(group_map, name):
                            group_map[name] = v'[]'
                        group_map[name].push(v'++group_count')
                        pos = close + 1
                    elif q is '=':
                        close = source.indexOf(')', pos)
                        if close is -1:
                            raise SyntaxError('Named group back-reference not closed, expecting a )')
                        name = source[pos+1:close]
                        if not isNaN(parseInt(name, 10)):
                            ans += '\\' + name
                        else:
                            if not has_prop(group_map, name):
                                raise SyntaxError('Invalid back-reference. The named group: ' + name + ' has not yet been defined.')
                            ans += '\\' + group_map[name][-1]
                        pos = close + 1
                        continue
                    else:
                        raise SyntaxError('Expecting < or = after (?P')
            else:
                group_count += 1
        elif ch is '.' and (flags & DOTALL):
            ans += r'[\s\S]'  # JavaScript has no DOTALL
            continue

        ans += ch

    return ans, flags, group_map

class MatchObject:

    def __init__(self, regex, match, pos, endpos):
        self.re = regex
        self.string = match.input
        self._start_pos = match.index
        self._groups = match
        self.pos, self.endpos = pos, endpos

    def _compute_extents(self):
        # compute start/end for each group
        match = self._groups
        self._start = v'Array(match.length)'
        self._end = v'Array(match.length)'
        self._start[0] = self._start_pos
        self._end[0] = self._start_pos + match[0].length
        offset = self._start_pos
        extent = match[0]
        loc = 0
        for v'var i = 1; i < match.length; i++':
            g = match[i]
            loc = extent.indexOf(g, loc)
            if loc is -1:
                self._start[i] = self._start[i-1]
                self._end[i] = self._end[i-1]
            else:
                self._start[i] = offset + loc
                loc += g.length
                self._end[i] = offset + loc # noqa:undef

    def groups(self, defval=None):
        ans = v'[]'
        for v'var i = 1; i < self._groups.length; i++':
            val = self._groups[i]  # noqa:undef
            if val is undefined:
                val = defval
            ans.push(val)
        return ans

    def _group_number(self, g):
        if jstype(g) is 'number':
            return g
        if has_prop(self.re.group_name_map, g):
            return self.re.group_name_map[g][-1]
        return g

    def _group_val(self, q, defval):
        val = undefined
        if jstype(q) is 'number' and -1 < q < self._groups.length:
            val = self._groups[q]
        else:
            if has_prop(self.re.group_name_map, q):
                val = self._groups[self.re.group_name_map[q][-1]]
        if val is undefined:
            val = defval
        return val

    def group(self):
        if arguments.length is 0:
            return self._groups[0]
        ans = v'[]'
        for v'var i = 0; i < arguments.length; i++':
            q = arguments[i]  # noqa:undef
            ans.push(self._group_val(q, None))
        return ans[0] if ans.length is 1 else ans

    def start(self, g):
        if self._start is undefined:
            self._compute_extents()
        val = self._start[self._group_number(g or 0)]
        if val is undefined:
            val = -1
        return val

    def end(self, g):
        if self._end is undefined:
            self._compute_extents()
        val = self._end[self._group_number(g or 0)]
        if val is undefined:
            val = -1
        return val

    def span(self, g):
        return [self.start(g), self.end(g)]

    def expand(self, repl):
        return _expand(repl, this._groups, this.re.group_name_map)

    def groupdict(self, defval=None):
        gnm = self.re.group_name_map
        names = Object.keys(gnm)
        ans = {}
        for v"var i = 0; i < names.length; i++":
            name = names[i]  # noqa:undef
            if has_prop(gnm, name):
                val = self._groups[gnm[name][-1]]
                if val is undefined:
                    val = defval
                ans[name] = val
        return ans

    def captures(self, group_name):
        ans = []
        if not has_prop(self.re.group_name_map, group_name):
            return ans
        groups = self.re.group_name_map[group_name]
        for v'var i = 0; i < groups.length; i++':
            val = self._groups[groups[i]]  # noqa:undef
            if val is not undefined:
                ans.push(val)
        return ans

    def capturesdict(self):
        gnm = self.re.group_name_map
        names = Object.keys(gnm)
        ans = {}
        for v'var i = 0; i < names.length; i++':
            name = names[i]  # noqa:undef
            ans[name] = self.captures(name)
        return ans

class RegexObject:

    def __init__(self, pattern, flags):
        self.pattern = pattern.source if isinstance(pattern, RegExp) else pattern
        self.js_pattern, self.flags, self.group_name_map = transform_regex(self.pattern, flags)

        modifiers = ''
        if self.flags & IGNORECASE: modifiers += 'i'
        if self.flags & MULTILINE: modifiers += 'm'
        if not (self.flags & ASCII) and supports_unicode:
            modifiers += 'u'
        self._modifiers = modifiers + 'g'
        self._pattern = RegExp(self.js_pattern, self._modifiers)

    def _do_search(self, pat, string, pos, endpos):
        pat.lastIndex = 0
        if endpos is not None:
            string = string[:endpos]
        while True:
            n = pat.exec(string)
            if n is None:
                return None
            if n.index >= pos:
                return MatchObject(self, n, pos, endpos)

    def search(self, string, pos=0, endpos=None):
        return self._do_search(self._pattern, string, pos, endpos)

    def match(self, string, pos=0, endpos=None):
        return self._do_search(RegExp('^' + self.js_pattern, self._modifiers), string, pos, endpos)

    def split(self, string, maxsplit=0):
        self._pattern.lastIndex = 0
        return string.split(self._pattern, maxsplit or undefined)

    def findall(self, string):
        self._pattern.lastIndex = 0
        return ρσ_list_decorate(string.match(self._pattern) or v'[]')

    def finditer(self, string):
        # We have to copy pat since lastIndex is mutable
        pat = RegExp(this._pattern.source, this._modifiers)  # noqa: unused-local
        ans = v"{'_string':string, '_r':pat, '_self':self}"
        ans[ρσ_iterator_symbol] = def():
            return this
        ans['next'] = def():
            m = this._r.exec(this._string)
            if m is None:
                return v"{'done':true}"
            return v"{'done':false, 'value':new MatchObject(this._self, m, 0, null)}"
        return ans

    def subn(self, repl, string, count=0):
        expand = _expand
        if jstype(repl) is 'function':
            expand = def(m, repl, gnm): return '' + repl(MatchObject(self, m, 0, None))
        this._pattern.lastIndex = 0
        num = 0
        matches = v'[]'

        while count < 1 or num < count:
            m = this._pattern.exec(string)
            if m is None:
                break
            matches.push(m)
            num += 1

        for v'var i = matches.length - 1; i > -1; i--':
            m = matches[i]  # noqa:undef
            start = m.index
            end = start + m[0].length
            string = string[:start] + expand(m, repl, self.group_name_map) + string[end:]
        return string, matches.length

    def sub(self, repl, string, count=0):
        return self.subn(repl, string, count)[0]

def _get_from_cache(pattern, flags):
    if isinstance(pattern, RegExp):
        pattern = pattern.source
    key = JSON.stringify(v'[pattern, flags]')
    if has_prop(_re_cache_map, key):
        return _re_cache_map[key]
    if _re_cache_items.length >= 100:
        v'delete _re_cache_map[_re_cache_items.shift()]'
    ans = RegexObject(pattern, flags)
    _re_cache_map[key] = ans
    _re_cache_items.push(key)
    return ans

def compile(pattern, flags=0):
    return _get_from_cache(pattern, flags)

def search(pattern, string, flags=0):
    return _get_from_cache(pattern, flags).search(string)

def match(pattern, string, flags=0):
    return _get_from_cache(pattern, flags).match(string)

def split(pattern, string, maxsplit=0, flags=0):
    return _get_from_cache(pattern, flags).split(string)

def findall(pattern, string, flags=0):
    return _get_from_cache(pattern, flags).findall(string)

def finditer(pattern, string, flags=0):
    return _get_from_cache(pattern, flags).finditer(string)

def sub(pattern, repl, string, count=0, flags=0):
    return _get_from_cache(pattern, flags).sub(repl, string, count)

def subn(pattern, repl, string, count=0, flags=0):
    return _get_from_cache(pattern, flags).subn(repl, string, count)

def escape(string):
    return string.replace(_RE_ESCAPE, '\\$&')

def purge():
    nonlocal _re_cache_map, _re_cache_items
    _re_cache_map = {}
    _re_cache_items = v'[]'
